import os
import shutil
import requests
from PyPDF2 import PdfMerger
IMAGES_PATH = 'C:\\python\\YiwuBusiness\\'

def function_download(path,url,file):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'}

    try:
        print('Downloading...', path)
        # response = requests.get(url, proxies = proxies, headers=headers)
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        with open(file, 'wb') as f:
            f.write(response.content)
            f.flush()
        error = 0
    except Exception as exc:
        error = 1
        print('%s' % (exc))
    return error

def function_pgcnt(pgfile):
    file = open(pgfile,encoding = 'utf-8')
    urlcontents = file.read()
    geturl_lst = urlcontents.split('\n')
    file.close()
    pageno = [s for s in geturl_lst if '<a href="node_' in s]
    pagecnt = len(pageno)
    return pagecnt

def function_pgana(path,pagecnt,errorpg):
    for cnt in range(0, pagecnt, 1):
        if cnt < 10:
            pg = '0' + str(cnt+1)
        else:
            pg = str(cnt+1)
        if pg == errorpg:
            continue
        pgfile = path + '\\' + pg + '.html'
        file = open(pgfile, encoding='utf-8')
        urlcontents = file.read()
        file.close()
        geturl_lst = urlcontents.split('\n')
        pdfilenametmp = [s for s in [s for s in geturl_lst if '.pdf' in s] if '<a href="' in s]
        pdfilenametmp1 = pdfilenametmp[0].split('"')
        pdfilenametmp2 = pdfilenametmp1[1].split('..')
        pdfilename = pdfilenametmp2[3]
        pdfurl = 'http://szb1.ywcity.cn' + pdfilename
        pgname = path + '\\' + pg + '.pdf'
        function_download(pgname, pdfurl, pgname)
        os.remove(pgfile)

def function_pdfmerge(pdf_lst,pdfname):
    file_merger = PdfMerger()
    for pdf in pdf_lst:
        file_merger.append(pdf) 

    file_merger.write(pdfname)


def function_main():
    time = input("请输入年月YYYYMMDD: ")
    ym = time[0:6]
    date = time[6:8]
    dirname = ym + date
    path = IMAGES_PATH + dirname
    if not os.path.exists(path):
        os.mkdir(path)
    ftpg = '01'
    fturl = 'http://szb1.ywcity.cn/layout/' + ym + '/' + date + '/node_' + ftpg + '.html'
    ftpgfile = path + '\\' + ftpg + '.html'
    function_download(path,fturl,ftpgfile)
    pagecnt = function_pgcnt(ftpgfile)
    errorpg = '0'
    for cnt in range(1, pagecnt+1, 1):
        if cnt < 10:
            pg = '0' + str(cnt)
        else:
            pg = str(cnt)
        wburl = 'http://szb1.ywcity.cn/layout/' + ym + '/' + date + '/node_' + pg + '.html'
        wbpgfile = path + '\\' + pg + '.html'
        error = function_download(path, wburl, wbpgfile)
        if error == 1:
            errorpg = pg
    function_pgana(path,pagecnt,errorpg)
    pdf_lst = [f for f in os.listdir(path) if f.endswith('.pdf')]
    pdf_lst = [os.path.join(path, filename) for filename in pdf_lst]
    pdfname = path + '\\' + time + '.pdf'
    function_pdfmerge(pdf_lst,pdfname)
    return 0

if __name__ == "__main__":
    function_main()